{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "1857ea93",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "c936f055",
   "metadata": {},
   "outputs": [],
   "source": [
    "movies = pd.read_csv('tmdb_5000_movies.csv')\n",
    "credits = pd.read_csv('tmdb_5000_credits.csv') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "62a46c65",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>budget</th>\n",
       "      <th>genres</th>\n",
       "      <th>homepage</th>\n",
       "      <th>id</th>\n",
       "      <th>keywords</th>\n",
       "      <th>original_language</th>\n",
       "      <th>original_title</th>\n",
       "      <th>overview</th>\n",
       "      <th>popularity</th>\n",
       "      <th>production_companies</th>\n",
       "      <th>production_countries</th>\n",
       "      <th>release_date</th>\n",
       "      <th>revenue</th>\n",
       "      <th>runtime</th>\n",
       "      <th>spoken_languages</th>\n",
       "      <th>status</th>\n",
       "      <th>tagline</th>\n",
       "      <th>title</th>\n",
       "      <th>vote_average</th>\n",
       "      <th>vote_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>237000000</td>\n",
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
       "      <td>http://www.avatarmovie.com/</td>\n",
       "      <td>19995</td>\n",
       "      <td>[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...</td>\n",
       "      <td>en</td>\n",
       "      <td>Avatar</td>\n",
       "      <td>In the 22nd century, a paraplegic Marine is di...</td>\n",
       "      <td>150.437577</td>\n",
       "      <td>[{\"name\": \"Ingenious Film Partners\", \"id\": 289...</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2009-12-10</td>\n",
       "      <td>2787965087</td>\n",
       "      <td>162.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...</td>\n",
       "      <td>Released</td>\n",
       "      <td>Enter the World of Pandora.</td>\n",
       "      <td>Avatar</td>\n",
       "      <td>7.2</td>\n",
       "      <td>11800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>300000000</td>\n",
       "      <td>[{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...</td>\n",
       "      <td>http://disney.go.com/disneypictures/pirates/</td>\n",
       "      <td>285</td>\n",
       "      <td>[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...</td>\n",
       "      <td>en</td>\n",
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
       "      <td>Captain Barbossa, long believed to be dead, ha...</td>\n",
       "      <td>139.082615</td>\n",
       "      <td>[{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"...</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2007-05-19</td>\n",
       "      <td>961000000</td>\n",
       "      <td>169.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>At the end of the world, the adventure begins.</td>\n",
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
       "      <td>6.9</td>\n",
       "      <td>4500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>245000000</td>\n",
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
       "      <td>http://www.sonypictures.com/movies/spectre/</td>\n",
       "      <td>206647</td>\n",
       "      <td>[{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...</td>\n",
       "      <td>en</td>\n",
       "      <td>Spectre</td>\n",
       "      <td>A cryptic message from Bond’s past sends him o...</td>\n",
       "      <td>107.376788</td>\n",
       "      <td>[{\"name\": \"Columbia Pictures\", \"id\": 5}, {\"nam...</td>\n",
       "      <td>[{\"iso_3166_1\": \"GB\", \"name\": \"United Kingdom\"...</td>\n",
       "      <td>2015-10-26</td>\n",
       "      <td>880674609</td>\n",
       "      <td>148.0</td>\n",
       "      <td>[{\"iso_639_1\": \"fr\", \"name\": \"Fran\\u00e7ais\"},...</td>\n",
       "      <td>Released</td>\n",
       "      <td>A Plan No One Escapes</td>\n",
       "      <td>Spectre</td>\n",
       "      <td>6.3</td>\n",
       "      <td>4466</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>250000000</td>\n",
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...</td>\n",
       "      <td>http://www.thedarkknightrises.com/</td>\n",
       "      <td>49026</td>\n",
       "      <td>[{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...</td>\n",
       "      <td>en</td>\n",
       "      <td>The Dark Knight Rises</td>\n",
       "      <td>Following the death of District Attorney Harve...</td>\n",
       "      <td>112.312950</td>\n",
       "      <td>[{\"name\": \"Legendary Pictures\", \"id\": 923}, {\"...</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2012-07-16</td>\n",
       "      <td>1084939099</td>\n",
       "      <td>165.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>The Legend Ends</td>\n",
       "      <td>The Dark Knight Rises</td>\n",
       "      <td>7.6</td>\n",
       "      <td>9106</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>260000000</td>\n",
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
       "      <td>http://movies.disney.com/john-carter</td>\n",
       "      <td>49529</td>\n",
       "      <td>[{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...</td>\n",
       "      <td>en</td>\n",
       "      <td>John Carter</td>\n",
       "      <td>John Carter is a war-weary, former military ca...</td>\n",
       "      <td>43.926995</td>\n",
       "      <td>[{\"name\": \"Walt Disney Pictures\", \"id\": 2}]</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2012-03-07</td>\n",
       "      <td>284139100</td>\n",
       "      <td>132.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>Lost in our world, found in another.</td>\n",
       "      <td>John Carter</td>\n",
       "      <td>6.1</td>\n",
       "      <td>2124</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      budget                                             genres  \\\n",
       "0  237000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
       "1  300000000  [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...   \n",
       "2  245000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
       "3  250000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...   \n",
       "4  260000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
       "\n",
       "                                       homepage      id  \\\n",
       "0                   http://www.avatarmovie.com/   19995   \n",
       "1  http://disney.go.com/disneypictures/pirates/     285   \n",
       "2   http://www.sonypictures.com/movies/spectre/  206647   \n",
       "3            http://www.thedarkknightrises.com/   49026   \n",
       "4          http://movies.disney.com/john-carter   49529   \n",
       "\n",
       "                                            keywords original_language  \\\n",
       "0  [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...                en   \n",
       "1  [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...                en   \n",
       "2  [{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...                en   \n",
       "3  [{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...                en   \n",
       "4  [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...                en   \n",
       "\n",
       "                             original_title  \\\n",
       "0                                    Avatar   \n",
       "1  Pirates of the Caribbean: At World's End   \n",
       "2                                   Spectre   \n",
       "3                     The Dark Knight Rises   \n",
       "4                               John Carter   \n",
       "\n",
       "                                            overview  popularity  \\\n",
       "0  In the 22nd century, a paraplegic Marine is di...  150.437577   \n",
       "1  Captain Barbossa, long believed to be dead, ha...  139.082615   \n",
       "2  A cryptic message from Bond’s past sends him o...  107.376788   \n",
       "3  Following the death of District Attorney Harve...  112.312950   \n",
       "4  John Carter is a war-weary, former military ca...   43.926995   \n",
       "\n",
       "                                production_companies  \\\n",
       "0  [{\"name\": \"Ingenious Film Partners\", \"id\": 289...   \n",
       "1  [{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"...   \n",
       "2  [{\"name\": \"Columbia Pictures\", \"id\": 5}, {\"nam...   \n",
       "3  [{\"name\": \"Legendary Pictures\", \"id\": 923}, {\"...   \n",
       "4        [{\"name\": \"Walt Disney Pictures\", \"id\": 2}]   \n",
       "\n",
       "                                production_countries release_date     revenue  \\\n",
       "0  [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2009-12-10  2787965087   \n",
       "1  [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2007-05-19   961000000   \n",
       "2  [{\"iso_3166_1\": \"GB\", \"name\": \"United Kingdom\"...   2015-10-26   880674609   \n",
       "3  [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2012-07-16  1084939099   \n",
       "4  [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2012-03-07   284139100   \n",
       "\n",
       "   runtime                                   spoken_languages    status  \\\n",
       "0    162.0  [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...  Released   \n",
       "1    169.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]  Released   \n",
       "2    148.0  [{\"iso_639_1\": \"fr\", \"name\": \"Fran\\u00e7ais\"},...  Released   \n",
       "3    165.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]  Released   \n",
       "4    132.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]  Released   \n",
       "\n",
       "                                          tagline  \\\n",
       "0                     Enter the World of Pandora.   \n",
       "1  At the end of the world, the adventure begins.   \n",
       "2                           A Plan No One Escapes   \n",
       "3                                 The Legend Ends   \n",
       "4            Lost in our world, found in another.   \n",
       "\n",
       "                                      title  vote_average  vote_count  \n",
       "0                                    Avatar           7.2       11800  \n",
       "1  Pirates of the Caribbean: At World's End           6.9        4500  \n",
       "2                                   Spectre           6.3        4466  \n",
       "3                     The Dark Knight Rises           7.6        9106  \n",
       "4                               John Carter           6.1        2124  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "3fe5f790",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(4803, 20)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "3410a6c8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movie_id</th>\n",
       "      <th>title</th>\n",
       "      <th>cast</th>\n",
       "      <th>crew</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>19995</td>\n",
       "      <td>Avatar</td>\n",
       "      <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n",
       "      <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>285</td>\n",
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
       "      <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n",
       "      <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>206647</td>\n",
       "      <td>Spectre</td>\n",
       "      <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n",
       "      <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>49026</td>\n",
       "      <td>The Dark Knight Rises</td>\n",
       "      <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n",
       "      <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>49529</td>\n",
       "      <td>John Carter</td>\n",
       "      <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n",
       "      <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   movie_id                                     title  \\\n",
       "0     19995                                    Avatar   \n",
       "1       285  Pirates of the Caribbean: At World's End   \n",
       "2    206647                                   Spectre   \n",
       "3     49026                     The Dark Knight Rises   \n",
       "4     49529                               John Carter   \n",
       "\n",
       "                                                cast  \\\n",
       "0  [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...   \n",
       "1  [{\"cast_id\": 4, \"character\": \"Captain Jack Spa...   \n",
       "2  [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...   \n",
       "3  [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...   \n",
       "4  [{\"cast_id\": 5, \"character\": \"John Carter\", \"c...   \n",
       "\n",
       "                                                crew  \n",
       "0  [{\"credit_id\": \"52fe48009251416c750aca23\", \"de...  \n",
       "1  [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...  \n",
       "2  [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...  \n",
       "3  [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...  \n",
       "4  [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "credits.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "e91d93e4",
   "metadata": {},
   "outputs": [],
   "source": [
    "movies = movies.merge(credits,on='title')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "b491943c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>budget</th>\n",
       "      <th>genres</th>\n",
       "      <th>homepage</th>\n",
       "      <th>id</th>\n",
       "      <th>keywords</th>\n",
       "      <th>original_language</th>\n",
       "      <th>original_title</th>\n",
       "      <th>overview</th>\n",
       "      <th>popularity</th>\n",
       "      <th>production_companies</th>\n",
       "      <th>...</th>\n",
       "      <th>runtime</th>\n",
       "      <th>spoken_languages</th>\n",
       "      <th>status</th>\n",
       "      <th>tagline</th>\n",
       "      <th>title</th>\n",
       "      <th>vote_average</th>\n",
       "      <th>vote_count</th>\n",
       "      <th>movie_id</th>\n",
       "      <th>cast</th>\n",
       "      <th>crew</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>237000000</td>\n",
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
       "      <td>http://www.avatarmovie.com/</td>\n",
       "      <td>19995</td>\n",
       "      <td>[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...</td>\n",
       "      <td>en</td>\n",
       "      <td>Avatar</td>\n",
       "      <td>In the 22nd century, a paraplegic Marine is di...</td>\n",
       "      <td>150.437577</td>\n",
       "      <td>[{\"name\": \"Ingenious Film Partners\", \"id\": 289...</td>\n",
       "      <td>...</td>\n",
       "      <td>162.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...</td>\n",
       "      <td>Released</td>\n",
       "      <td>Enter the World of Pandora.</td>\n",
       "      <td>Avatar</td>\n",
       "      <td>7.2</td>\n",
       "      <td>11800</td>\n",
       "      <td>19995</td>\n",
       "      <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n",
       "      <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>300000000</td>\n",
       "      <td>[{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...</td>\n",
       "      <td>http://disney.go.com/disneypictures/pirates/</td>\n",
       "      <td>285</td>\n",
       "      <td>[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...</td>\n",
       "      <td>en</td>\n",
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
       "      <td>Captain Barbossa, long believed to be dead, ha...</td>\n",
       "      <td>139.082615</td>\n",
       "      <td>[{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"...</td>\n",
       "      <td>...</td>\n",
       "      <td>169.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>At the end of the world, the adventure begins.</td>\n",
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
       "      <td>6.9</td>\n",
       "      <td>4500</td>\n",
       "      <td>285</td>\n",
       "      <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n",
       "      <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>245000000</td>\n",
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
       "      <td>http://www.sonypictures.com/movies/spectre/</td>\n",
       "      <td>206647</td>\n",
       "      <td>[{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...</td>\n",
       "      <td>en</td>\n",
       "      <td>Spectre</td>\n",
       "      <td>A cryptic message from Bond’s past sends him o...</td>\n",
       "      <td>107.376788</td>\n",
       "      <td>[{\"name\": \"Columbia Pictures\", \"id\": 5}, {\"nam...</td>\n",
       "      <td>...</td>\n",
       "      <td>148.0</td>\n",
       "      <td>[{\"iso_639_1\": \"fr\", \"name\": \"Fran\\u00e7ais\"},...</td>\n",
       "      <td>Released</td>\n",
       "      <td>A Plan No One Escapes</td>\n",
       "      <td>Spectre</td>\n",
       "      <td>6.3</td>\n",
       "      <td>4466</td>\n",
       "      <td>206647</td>\n",
       "      <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n",
       "      <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>250000000</td>\n",
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...</td>\n",
       "      <td>http://www.thedarkknightrises.com/</td>\n",
       "      <td>49026</td>\n",
       "      <td>[{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...</td>\n",
       "      <td>en</td>\n",
       "      <td>The Dark Knight Rises</td>\n",
       "      <td>Following the death of District Attorney Harve...</td>\n",
       "      <td>112.312950</td>\n",
       "      <td>[{\"name\": \"Legendary Pictures\", \"id\": 923}, {\"...</td>\n",
       "      <td>...</td>\n",
       "      <td>165.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>The Legend Ends</td>\n",
       "      <td>The Dark Knight Rises</td>\n",
       "      <td>7.6</td>\n",
       "      <td>9106</td>\n",
       "      <td>49026</td>\n",
       "      <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n",
       "      <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>260000000</td>\n",
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
       "      <td>http://movies.disney.com/john-carter</td>\n",
       "      <td>49529</td>\n",
       "      <td>[{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...</td>\n",
       "      <td>en</td>\n",
       "      <td>John Carter</td>\n",
       "      <td>John Carter is a war-weary, former military ca...</td>\n",
       "      <td>43.926995</td>\n",
       "      <td>[{\"name\": \"Walt Disney Pictures\", \"id\": 2}]</td>\n",
       "      <td>...</td>\n",
       "      <td>132.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>Lost in our world, found in another.</td>\n",
       "      <td>John Carter</td>\n",
       "      <td>6.1</td>\n",
       "      <td>2124</td>\n",
       "      <td>49529</td>\n",
       "      <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n",
       "      <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 23 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      budget                                             genres  \\\n",
       "0  237000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
       "1  300000000  [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...   \n",
       "2  245000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
       "3  250000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...   \n",
       "4  260000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
       "\n",
       "                                       homepage      id  \\\n",
       "0                   http://www.avatarmovie.com/   19995   \n",
       "1  http://disney.go.com/disneypictures/pirates/     285   \n",
       "2   http://www.sonypictures.com/movies/spectre/  206647   \n",
       "3            http://www.thedarkknightrises.com/   49026   \n",
       "4          http://movies.disney.com/john-carter   49529   \n",
       "\n",
       "                                            keywords original_language  \\\n",
       "0  [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...                en   \n",
       "1  [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...                en   \n",
       "2  [{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...                en   \n",
       "3  [{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...                en   \n",
       "4  [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...                en   \n",
       "\n",
       "                             original_title  \\\n",
       "0                                    Avatar   \n",
       "1  Pirates of the Caribbean: At World's End   \n",
       "2                                   Spectre   \n",
       "3                     The Dark Knight Rises   \n",
       "4                               John Carter   \n",
       "\n",
       "                                            overview  popularity  \\\n",
       "0  In the 22nd century, a paraplegic Marine is di...  150.437577   \n",
       "1  Captain Barbossa, long believed to be dead, ha...  139.082615   \n",
       "2  A cryptic message from Bond’s past sends him o...  107.376788   \n",
       "3  Following the death of District Attorney Harve...  112.312950   \n",
       "4  John Carter is a war-weary, former military ca...   43.926995   \n",
       "\n",
       "                                production_companies  ... runtime  \\\n",
       "0  [{\"name\": \"Ingenious Film Partners\", \"id\": 289...  ...   162.0   \n",
       "1  [{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"...  ...   169.0   \n",
       "2  [{\"name\": \"Columbia Pictures\", \"id\": 5}, {\"nam...  ...   148.0   \n",
       "3  [{\"name\": \"Legendary Pictures\", \"id\": 923}, {\"...  ...   165.0   \n",
       "4        [{\"name\": \"Walt Disney Pictures\", \"id\": 2}]  ...   132.0   \n",
       "\n",
       "                                    spoken_languages    status  \\\n",
       "0  [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...  Released   \n",
       "1           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]  Released   \n",
       "2  [{\"iso_639_1\": \"fr\", \"name\": \"Fran\\u00e7ais\"},...  Released   \n",
       "3           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]  Released   \n",
       "4           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]  Released   \n",
       "\n",
       "                                          tagline  \\\n",
       "0                     Enter the World of Pandora.   \n",
       "1  At the end of the world, the adventure begins.   \n",
       "2                           A Plan No One Escapes   \n",
       "3                                 The Legend Ends   \n",
       "4            Lost in our world, found in another.   \n",
       "\n",
       "                                      title vote_average vote_count movie_id  \\\n",
       "0                                    Avatar          7.2      11800    19995   \n",
       "1  Pirates of the Caribbean: At World's End          6.9       4500      285   \n",
       "2                                   Spectre          6.3       4466   206647   \n",
       "3                     The Dark Knight Rises          7.6       9106    49026   \n",
       "4                               John Carter          6.1       2124    49529   \n",
       "\n",
       "                                                cast  \\\n",
       "0  [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...   \n",
       "1  [{\"cast_id\": 4, \"character\": \"Captain Jack Spa...   \n",
       "2  [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...   \n",
       "3  [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...   \n",
       "4  [{\"cast_id\": 5, \"character\": \"John Carter\", \"c...   \n",
       "\n",
       "                                                crew  \n",
       "0  [{\"credit_id\": \"52fe48009251416c750aca23\", \"de...  \n",
       "1  [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...  \n",
       "2  [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...  \n",
       "3  [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...  \n",
       "4  [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...  \n",
       "\n",
       "[5 rows x 23 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies.head()\n",
    "# budget\n",
    "# homepage\n",
    "# id\n",
    "# original_language\n",
    "# original_title\n",
    "# popularity\n",
    "# production_comapny\n",
    "# production_countries\n",
    "# release-date(not sure)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "d517efd4",
   "metadata": {},
   "outputs": [],
   "source": [
    "movies = movies[['movie_id','title','overview','genres','keywords','cast','crew']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "6e0d16dc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movie_id</th>\n",
       "      <th>title</th>\n",
       "      <th>overview</th>\n",
       "      <th>genres</th>\n",
       "      <th>keywords</th>\n",
       "      <th>cast</th>\n",
       "      <th>crew</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>19995</td>\n",
       "      <td>Avatar</td>\n",
       "      <td>In the 22nd century, a paraplegic Marine is di...</td>\n",
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
       "      <td>[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...</td>\n",
       "      <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n",
       "      <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>285</td>\n",
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
       "      <td>Captain Barbossa, long believed to be dead, ha...</td>\n",
       "      <td>[{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...</td>\n",
       "      <td>[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...</td>\n",
       "      <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n",
       "      <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>206647</td>\n",
       "      <td>Spectre</td>\n",
       "      <td>A cryptic message from Bond’s past sends him o...</td>\n",
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
       "      <td>[{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...</td>\n",
       "      <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n",
       "      <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>49026</td>\n",
       "      <td>The Dark Knight Rises</td>\n",
       "      <td>Following the death of District Attorney Harve...</td>\n",
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...</td>\n",
       "      <td>[{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...</td>\n",
       "      <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n",
       "      <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>49529</td>\n",
       "      <td>John Carter</td>\n",
       "      <td>John Carter is a war-weary, former military ca...</td>\n",
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
       "      <td>[{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...</td>\n",
       "      <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n",
       "      <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   movie_id                                     title  \\\n",
       "0     19995                                    Avatar   \n",
       "1       285  Pirates of the Caribbean: At World's End   \n",
       "2    206647                                   Spectre   \n",
       "3     49026                     The Dark Knight Rises   \n",
       "4     49529                               John Carter   \n",
       "\n",
       "                                            overview  \\\n",
       "0  In the 22nd century, a paraplegic Marine is di...   \n",
       "1  Captain Barbossa, long believed to be dead, ha...   \n",
       "2  A cryptic message from Bond’s past sends him o...   \n",
       "3  Following the death of District Attorney Harve...   \n",
       "4  John Carter is a war-weary, former military ca...   \n",
       "\n",
       "                                              genres  \\\n",
       "0  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
       "1  [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...   \n",
       "2  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
       "3  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...   \n",
       "4  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
       "\n",
       "                                            keywords  \\\n",
       "0  [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...   \n",
       "1  [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...   \n",
       "2  [{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...   \n",
       "3  [{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...   \n",
       "4  [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...   \n",
       "\n",
       "                                                cast  \\\n",
       "0  [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...   \n",
       "1  [{\"cast_id\": 4, \"character\": \"Captain Jack Spa...   \n",
       "2  [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...   \n",
       "3  [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...   \n",
       "4  [{\"cast_id\": 5, \"character\": \"John Carter\", \"c...   \n",
       "\n",
       "                                                crew  \n",
       "0  [{\"credit_id\": \"52fe48009251416c750aca23\", \"de...  \n",
       "1  [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...  \n",
       "2  [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...  \n",
       "3  [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...  \n",
       "4  [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "acb58b32",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "movie_id    0\n",
       "title       0\n",
       "overview    3\n",
       "genres      0\n",
       "keywords    0\n",
       "cast        0\n",
       "crew        0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "122e570d",
   "metadata": {},
   "outputs": [],
   "source": [
    "movies.dropna(inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "135344a9",
   "metadata": {},
   "outputs": [],
   "source": [
    "#movies.duplicated().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "be5c56eb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"name\": \"Fantasy\"}, {\"id\": 878, \"name\": \"Science Fiction\"}]'"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies.iloc[0].genres"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "d8b61c39",
   "metadata": {},
   "outputs": [],
   "source": [
    "#ast to convert string-list to list\n",
    "import ast"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "bfcfea46",
   "metadata": {},
   "outputs": [],
   "source": [
    "def convert(text):\n",
    "    L = []\n",
    "    for i in ast.literal_eval(text):\n",
    "        L.append(i['name']) \n",
    "    return L "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6d7af2b7",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "a643728f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movie_id</th>\n",
       "      <th>title</th>\n",
       "      <th>overview</th>\n",
       "      <th>genres</th>\n",
       "      <th>keywords</th>\n",
       "      <th>cast</th>\n",
       "      <th>crew</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>19995</td>\n",
       "      <td>Avatar</td>\n",
       "      <td>In the 22nd century, a paraplegic Marine is di...</td>\n",
       "      <td>[Action, Adventure, Fantasy, Science Fiction]</td>\n",
       "      <td>[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...</td>\n",
       "      <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n",
       "      <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>285</td>\n",
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
       "      <td>Captain Barbossa, long believed to be dead, ha...</td>\n",
       "      <td>[Adventure, Fantasy, Action]</td>\n",
       "      <td>[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...</td>\n",
       "      <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n",
       "      <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>206647</td>\n",
       "      <td>Spectre</td>\n",
       "      <td>A cryptic message from Bond’s past sends him o...</td>\n",
       "      <td>[Action, Adventure, Crime]</td>\n",
       "      <td>[{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...</td>\n",
       "      <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n",
       "      <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>49026</td>\n",
       "      <td>The Dark Knight Rises</td>\n",
       "      <td>Following the death of District Attorney Harve...</td>\n",
       "      <td>[Action, Crime, Drama, Thriller]</td>\n",
       "      <td>[{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...</td>\n",
       "      <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n",
       "      <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>49529</td>\n",
       "      <td>John Carter</td>\n",
       "      <td>John Carter is a war-weary, former military ca...</td>\n",
       "      <td>[Action, Adventure, Science Fiction]</td>\n",
       "      <td>[{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...</td>\n",
       "      <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n",
       "      <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   movie_id                                     title  \\\n",
       "0     19995                                    Avatar   \n",
       "1       285  Pirates of the Caribbean: At World's End   \n",
       "2    206647                                   Spectre   \n",
       "3     49026                     The Dark Knight Rises   \n",
       "4     49529                               John Carter   \n",
       "\n",
       "                                            overview  \\\n",
       "0  In the 22nd century, a paraplegic Marine is di...   \n",
       "1  Captain Barbossa, long believed to be dead, ha...   \n",
       "2  A cryptic message from Bond’s past sends him o...   \n",
       "3  Following the death of District Attorney Harve...   \n",
       "4  John Carter is a war-weary, former military ca...   \n",
       "\n",
       "                                          genres  \\\n",
       "0  [Action, Adventure, Fantasy, Science Fiction]   \n",
       "1                   [Adventure, Fantasy, Action]   \n",
       "2                     [Action, Adventure, Crime]   \n",
       "3               [Action, Crime, Drama, Thriller]   \n",
       "4           [Action, Adventure, Science Fiction]   \n",
       "\n",
       "                                            keywords  \\\n",
       "0  [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...   \n",
       "1  [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...   \n",
       "2  [{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...   \n",
       "3  [{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...   \n",
       "4  [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...   \n",
       "\n",
       "                                                cast  \\\n",
       "0  [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...   \n",
       "1  [{\"cast_id\": 4, \"character\": \"Captain Jack Spa...   \n",
       "2  [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...   \n",
       "3  [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...   \n",
       "4  [{\"cast_id\": 5, \"character\": \"John Carter\", \"c...   \n",
       "\n",
       "                                                crew  \n",
       "0  [{\"credit_id\": \"52fe48009251416c750aca23\", \"de...  \n",
       "1  [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...  \n",
       "2  [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...  \n",
       "3  [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...  \n",
       "4  [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies['genres'] = movies['genres'].apply(convert)\n",
    "movies.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "d845d787",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movie_id</th>\n",
       "      <th>title</th>\n",
       "      <th>overview</th>\n",
       "      <th>genres</th>\n",
       "      <th>keywords</th>\n",
       "      <th>cast</th>\n",
       "      <th>crew</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>19995</td>\n",
       "      <td>Avatar</td>\n",
       "      <td>In the 22nd century, a paraplegic Marine is di...</td>\n",
       "      <td>[Action, Adventure, Fantasy, Science Fiction]</td>\n",
       "      <td>[culture clash, future, space war, space colon...</td>\n",
       "      <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n",
       "      <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>285</td>\n",
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
       "      <td>Captain Barbossa, long believed to be dead, ha...</td>\n",
       "      <td>[Adventure, Fantasy, Action]</td>\n",
       "      <td>[ocean, drug abuse, exotic island, east india ...</td>\n",
       "      <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n",
       "      <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>206647</td>\n",
       "      <td>Spectre</td>\n",
       "      <td>A cryptic message from Bond’s past sends him o...</td>\n",
       "      <td>[Action, Adventure, Crime]</td>\n",
       "      <td>[spy, based on novel, secret agent, sequel, mi...</td>\n",
       "      <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n",
       "      <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>49026</td>\n",
       "      <td>The Dark Knight Rises</td>\n",
       "      <td>Following the death of District Attorney Harve...</td>\n",
       "      <td>[Action, Crime, Drama, Thriller]</td>\n",
       "      <td>[dc comics, crime fighter, terrorist, secret i...</td>\n",
       "      <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n",
       "      <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>49529</td>\n",
       "      <td>John Carter</td>\n",
       "      <td>John Carter is a war-weary, former military ca...</td>\n",
       "      <td>[Action, Adventure, Science Fiction]</td>\n",
       "      <td>[based on novel, mars, medallion, space travel...</td>\n",
       "      <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n",
       "      <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   movie_id                                     title  \\\n",
       "0     19995                                    Avatar   \n",
       "1       285  Pirates of the Caribbean: At World's End   \n",
       "2    206647                                   Spectre   \n",
       "3     49026                     The Dark Knight Rises   \n",
       "4     49529                               John Carter   \n",
       "\n",
       "                                            overview  \\\n",
       "0  In the 22nd century, a paraplegic Marine is di...   \n",
       "1  Captain Barbossa, long believed to be dead, ha...   \n",
       "2  A cryptic message from Bond’s past sends him o...   \n",
       "3  Following the death of District Attorney Harve...   \n",
       "4  John Carter is a war-weary, former military ca...   \n",
       "\n",
       "                                          genres  \\\n",
       "0  [Action, Adventure, Fantasy, Science Fiction]   \n",
       "1                   [Adventure, Fantasy, Action]   \n",
       "2                     [Action, Adventure, Crime]   \n",
       "3               [Action, Crime, Drama, Thriller]   \n",
       "4           [Action, Adventure, Science Fiction]   \n",
       "\n",
       "                                            keywords  \\\n",
       "0  [culture clash, future, space war, space colon...   \n",
       "1  [ocean, drug abuse, exotic island, east india ...   \n",
       "2  [spy, based on novel, secret agent, sequel, mi...   \n",
       "3  [dc comics, crime fighter, terrorist, secret i...   \n",
       "4  [based on novel, mars, medallion, space travel...   \n",
       "\n",
       "                                                cast  \\\n",
       "0  [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...   \n",
       "1  [{\"cast_id\": 4, \"character\": \"Captain Jack Spa...   \n",
       "2  [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...   \n",
       "3  [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...   \n",
       "4  [{\"cast_id\": 5, \"character\": \"John Carter\", \"c...   \n",
       "\n",
       "                                                crew  \n",
       "0  [{\"credit_id\": \"52fe48009251416c750aca23\", \"de...  \n",
       "1  [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...  \n",
       "2  [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...  \n",
       "3  [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...  \n",
       "4  [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...  "
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies['keywords'] = movies['keywords'].apply(convert)\n",
    "movies.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "511b76bd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'id': 28, 'name': 'Action'},\n",
       " {'id': 12, 'name': 'Adventure'},\n",
       " {'id': 14, 'name': 'Fantasy'},\n",
       " {'id': 878, 'name': 'Science Fiction'}]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import ast\n",
    "ast.literal_eval('[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"name\": \"Fantasy\"}, {\"id\": 878, \"name\":\"Science Fiction\"}]')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "7024e6a0",
   "metadata": {},
   "outputs": [],
   "source": [
    "def convert3(text):\n",
    "    L = []\n",
    "    counter = 0\n",
    "    for i in ast.literal_eval(text):\n",
    "        if counter < 3:\n",
    "            L.append(i['name'])\n",
    "        counter+=1\n",
    "    return L "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "dec536a8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movie_id</th>\n",
       "      <th>title</th>\n",
       "      <th>overview</th>\n",
       "      <th>genres</th>\n",
       "      <th>keywords</th>\n",
       "      <th>cast</th>\n",
       "      <th>crew</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>19995</td>\n",
       "      <td>Avatar</td>\n",
       "      <td>In the 22nd century, a paraplegic Marine is di...</td>\n",
       "      <td>[Action, Adventure, Fantasy, Science Fiction]</td>\n",
       "      <td>[culture clash, future, space war, space colon...</td>\n",
       "      <td>[Sam Worthington, Zoe Saldana, Sigourney Weave...</td>\n",
       "      <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>285</td>\n",
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
       "      <td>Captain Barbossa, long believed to be dead, ha...</td>\n",
       "      <td>[Adventure, Fantasy, Action]</td>\n",
       "      <td>[ocean, drug abuse, exotic island, east india ...</td>\n",
       "      <td>[Johnny Depp, Orlando Bloom, Keira Knightley, ...</td>\n",
       "      <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>206647</td>\n",
       "      <td>Spectre</td>\n",
       "      <td>A cryptic message from Bond’s past sends him o...</td>\n",
       "      <td>[Action, Adventure, Crime]</td>\n",
       "      <td>[spy, based on novel, secret agent, sequel, mi...</td>\n",
       "      <td>[Daniel Craig, Christoph Waltz, Léa Seydoux, R...</td>\n",
       "      <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>49026</td>\n",
       "      <td>The Dark Knight Rises</td>\n",
       "      <td>Following the death of District Attorney Harve...</td>\n",
       "      <td>[Action, Crime, Drama, Thriller]</td>\n",
       "      <td>[dc comics, crime fighter, terrorist, secret i...</td>\n",
       "      <td>[Christian Bale, Michael Caine, Gary Oldman, A...</td>\n",
       "      <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>49529</td>\n",
       "      <td>John Carter</td>\n",
       "      <td>John Carter is a war-weary, former military ca...</td>\n",
       "      <td>[Action, Adventure, Science Fiction]</td>\n",
       "      <td>[based on novel, mars, medallion, space travel...</td>\n",
       "      <td>[Taylor Kitsch, Lynn Collins, Samantha Morton,...</td>\n",
       "      <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   movie_id                                     title  \\\n",
       "0     19995                                    Avatar   \n",
       "1       285  Pirates of the Caribbean: At World's End   \n",
       "2    206647                                   Spectre   \n",
       "3     49026                     The Dark Knight Rises   \n",
       "4     49529                               John Carter   \n",
       "\n",
       "                                            overview  \\\n",
       "0  In the 22nd century, a paraplegic Marine is di...   \n",
       "1  Captain Barbossa, long believed to be dead, ha...   \n",
       "2  A cryptic message from Bond’s past sends him o...   \n",
       "3  Following the death of District Attorney Harve...   \n",
       "4  John Carter is a war-weary, former military ca...   \n",
       "\n",
       "                                          genres  \\\n",
       "0  [Action, Adventure, Fantasy, Science Fiction]   \n",
       "1                   [Adventure, Fantasy, Action]   \n",
       "2                     [Action, Adventure, Crime]   \n",
       "3               [Action, Crime, Drama, Thriller]   \n",
       "4           [Action, Adventure, Science Fiction]   \n",
       "\n",
       "                                            keywords  \\\n",
       "0  [culture clash, future, space war, space colon...   \n",
       "1  [ocean, drug abuse, exotic island, east india ...   \n",
       "2  [spy, based on novel, secret agent, sequel, mi...   \n",
       "3  [dc comics, crime fighter, terrorist, secret i...   \n",
       "4  [based on novel, mars, medallion, space travel...   \n",
       "\n",
       "                                                cast  \\\n",
       "0  [Sam Worthington, Zoe Saldana, Sigourney Weave...   \n",
       "1  [Johnny Depp, Orlando Bloom, Keira Knightley, ...   \n",
       "2  [Daniel Craig, Christoph Waltz, Léa Seydoux, R...   \n",
       "3  [Christian Bale, Michael Caine, Gary Oldman, A...   \n",
       "4  [Taylor Kitsch, Lynn Collins, Samantha Morton,...   \n",
       "\n",
       "                                                crew  \n",
       "0  [{\"credit_id\": \"52fe48009251416c750aca23\", \"de...  \n",
       "1  [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...  \n",
       "2  [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...  \n",
       "3  [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...  \n",
       "4  [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...  "
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies['cast'] = movies['cast'].apply(convert)\n",
    "movies.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "ee640520",
   "metadata": {},
   "outputs": [],
   "source": [
    "movies['cast'] = movies['cast'].apply(lambda x:x[0:3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "7597779d",
   "metadata": {},
   "outputs": [],
   "source": [
    "def fetch_director(text):\n",
    "    L = []\n",
    "    for i in ast.literal_eval(text):\n",
    "        if i['job'] == 'Director': \n",
    "            L.append(i['name'])\n",
    "    return L "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "913c4480",
   "metadata": {},
   "outputs": [],
   "source": [
    "movies['crew'] = movies['crew'].apply(fetch_director)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "d87dcda7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movie_id</th>\n",
       "      <th>title</th>\n",
       "      <th>overview</th>\n",
       "      <th>genres</th>\n",
       "      <th>keywords</th>\n",
       "      <th>cast</th>\n",
       "      <th>crew</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4702</th>\n",
       "      <td>79120</td>\n",
       "      <td>Weekend</td>\n",
       "      <td>After a drunken house party with his straight ...</td>\n",
       "      <td>[Drama, Romance]</td>\n",
       "      <td>[gay, great britain, one-night stand, independ...</td>\n",
       "      <td>[Tom Cullen, Chris New, Jonathan Race]</td>\n",
       "      <td>[Andrew Haigh]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1341</th>\n",
       "      <td>8470</td>\n",
       "      <td>John Q</td>\n",
       "      <td>John Quincy Archibald is a father and husband ...</td>\n",
       "      <td>[Drama, Thriller, Crime]</td>\n",
       "      <td>[father son relationship, chicago, heart attac...</td>\n",
       "      <td>[Denzel Washington, Robert Duvall, Anne Heche]</td>\n",
       "      <td>[Nick Cassavetes]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3758</th>\n",
       "      <td>8357</td>\n",
       "      <td>What the #$*! Do We (K)now!?</td>\n",
       "      <td>Amanda (Marlee Maitlin) is a divorced woman wh...</td>\n",
       "      <td>[Documentary]</td>\n",
       "      <td>[alternate dimension, new age, parallel world,...</td>\n",
       "      <td>[Marlee Matlin, Elaine Hendrix, Robert Blanche]</td>\n",
       "      <td>[William Arntz, Betsy Chasse]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2423</th>\n",
       "      <td>9792</td>\n",
       "      <td>The Hills Have Eyes</td>\n",
       "      <td>Based on Wes Craven's 1977 suspenseful cult cl...</td>\n",
       "      <td>[Horror, Thriller]</td>\n",
       "      <td>[ambush, new mexico, van, family holiday, axe ...</td>\n",
       "      <td>[Aaron Stanford, Kathleen Quinlan, Vinessa Shaw]</td>\n",
       "      <td>[Alexandre Aja]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>297</th>\n",
       "      <td>1372</td>\n",
       "      <td>Blood Diamond</td>\n",
       "      <td>An ex-mercenary turned smuggler. A Mende fishe...</td>\n",
       "      <td>[Drama, Thriller, Action]</td>\n",
       "      <td>[rebel, journalist, journalism, loss of family...</td>\n",
       "      <td>[Leonardo DiCaprio, Djimon Hounsou, Jennifer C...</td>\n",
       "      <td>[Edward Zwick]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      movie_id                         title  \\\n",
       "4702     79120                       Weekend   \n",
       "1341      8470                        John Q   \n",
       "3758      8357  What the #$*! Do We (K)now!?   \n",
       "2423      9792           The Hills Have Eyes   \n",
       "297       1372                 Blood Diamond   \n",
       "\n",
       "                                               overview  \\\n",
       "4702  After a drunken house party with his straight ...   \n",
       "1341  John Quincy Archibald is a father and husband ...   \n",
       "3758  Amanda (Marlee Maitlin) is a divorced woman wh...   \n",
       "2423  Based on Wes Craven's 1977 suspenseful cult cl...   \n",
       "297   An ex-mercenary turned smuggler. A Mende fishe...   \n",
       "\n",
       "                         genres  \\\n",
       "4702           [Drama, Romance]   \n",
       "1341   [Drama, Thriller, Crime]   \n",
       "3758              [Documentary]   \n",
       "2423         [Horror, Thriller]   \n",
       "297   [Drama, Thriller, Action]   \n",
       "\n",
       "                                               keywords  \\\n",
       "4702  [gay, great britain, one-night stand, independ...   \n",
       "1341  [father son relationship, chicago, heart attac...   \n",
       "3758  [alternate dimension, new age, parallel world,...   \n",
       "2423  [ambush, new mexico, van, family holiday, axe ...   \n",
       "297   [rebel, journalist, journalism, loss of family...   \n",
       "\n",
       "                                                   cast  \\\n",
       "4702             [Tom Cullen, Chris New, Jonathan Race]   \n",
       "1341     [Denzel Washington, Robert Duvall, Anne Heche]   \n",
       "3758    [Marlee Matlin, Elaine Hendrix, Robert Blanche]   \n",
       "2423   [Aaron Stanford, Kathleen Quinlan, Vinessa Shaw]   \n",
       "297   [Leonardo DiCaprio, Djimon Hounsou, Jennifer C...   \n",
       "\n",
       "                               crew  \n",
       "4702                 [Andrew Haigh]  \n",
       "1341              [Nick Cassavetes]  \n",
       "3758  [William Arntz, Betsy Chasse]  \n",
       "2423                [Alexandre Aja]  \n",
       "297                  [Edward Zwick]  "
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#movies['overview'] = movies['overview'].apply(lambda x:x.split())\n",
    "movies.sample(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "77acdc21",
   "metadata": {},
   "outputs": [],
   "source": [
    "def collapse(L):\n",
    "    L1 = []\n",
    "    for i in L:\n",
    "        L1.append(i.replace(\" \",\"\"))\n",
    "    return L1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "b3439e7c",
   "metadata": {},
   "outputs": [],
   "source": [
    "movies['cast'] = movies['cast'].apply(collapse)\n",
    "movies['crew'] = movies['crew'].apply(collapse)\n",
    "movies['genres'] = movies['genres'].apply(collapse)\n",
    "movies['keywords'] = movies['keywords'].apply(collapse)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "5684929f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movie_id</th>\n",
       "      <th>title</th>\n",
       "      <th>overview</th>\n",
       "      <th>genres</th>\n",
       "      <th>keywords</th>\n",
       "      <th>cast</th>\n",
       "      <th>crew</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>19995</td>\n",
       "      <td>Avatar</td>\n",
       "      <td>In the 22nd century, a paraplegic Marine is di...</td>\n",
       "      <td>[Action, Adventure, Fantasy, ScienceFiction]</td>\n",
       "      <td>[cultureclash, future, spacewar, spacecolony, ...</td>\n",
       "      <td>[SamWorthington, ZoeSaldana, SigourneyWeaver]</td>\n",
       "      <td>[JamesCameron]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>285</td>\n",
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
       "      <td>Captain Barbossa, long believed to be dead, ha...</td>\n",
       "      <td>[Adventure, Fantasy, Action]</td>\n",
       "      <td>[ocean, drugabuse, exoticisland, eastindiatrad...</td>\n",
       "      <td>[JohnnyDepp, OrlandoBloom, KeiraKnightley]</td>\n",
       "      <td>[GoreVerbinski]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>206647</td>\n",
       "      <td>Spectre</td>\n",
       "      <td>A cryptic message from Bond’s past sends him o...</td>\n",
       "      <td>[Action, Adventure, Crime]</td>\n",
       "      <td>[spy, basedonnovel, secretagent, sequel, mi6, ...</td>\n",
       "      <td>[DanielCraig, ChristophWaltz, LéaSeydoux]</td>\n",
       "      <td>[SamMendes]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>49026</td>\n",
       "      <td>The Dark Knight Rises</td>\n",
       "      <td>Following the death of District Attorney Harve...</td>\n",
       "      <td>[Action, Crime, Drama, Thriller]</td>\n",
       "      <td>[dccomics, crimefighter, terrorist, secretiden...</td>\n",
       "      <td>[ChristianBale, MichaelCaine, GaryOldman]</td>\n",
       "      <td>[ChristopherNolan]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>49529</td>\n",
       "      <td>John Carter</td>\n",
       "      <td>John Carter is a war-weary, former military ca...</td>\n",
       "      <td>[Action, Adventure, ScienceFiction]</td>\n",
       "      <td>[basedonnovel, mars, medallion, spacetravel, p...</td>\n",
       "      <td>[TaylorKitsch, LynnCollins, SamanthaMorton]</td>\n",
       "      <td>[AndrewStanton]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   movie_id                                     title  \\\n",
       "0     19995                                    Avatar   \n",
       "1       285  Pirates of the Caribbean: At World's End   \n",
       "2    206647                                   Spectre   \n",
       "3     49026                     The Dark Knight Rises   \n",
       "4     49529                               John Carter   \n",
       "\n",
       "                                            overview  \\\n",
       "0  In the 22nd century, a paraplegic Marine is di...   \n",
       "1  Captain Barbossa, long believed to be dead, ha...   \n",
       "2  A cryptic message from Bond’s past sends him o...   \n",
       "3  Following the death of District Attorney Harve...   \n",
       "4  John Carter is a war-weary, former military ca...   \n",
       "\n",
       "                                         genres  \\\n",
       "0  [Action, Adventure, Fantasy, ScienceFiction]   \n",
       "1                  [Adventure, Fantasy, Action]   \n",
       "2                    [Action, Adventure, Crime]   \n",
       "3              [Action, Crime, Drama, Thriller]   \n",
       "4           [Action, Adventure, ScienceFiction]   \n",
       "\n",
       "                                            keywords  \\\n",
       "0  [cultureclash, future, spacewar, spacecolony, ...   \n",
       "1  [ocean, drugabuse, exoticisland, eastindiatrad...   \n",
       "2  [spy, basedonnovel, secretagent, sequel, mi6, ...   \n",
       "3  [dccomics, crimefighter, terrorist, secretiden...   \n",
       "4  [basedonnovel, mars, medallion, spacetravel, p...   \n",
       "\n",
       "                                            cast                crew  \n",
       "0  [SamWorthington, ZoeSaldana, SigourneyWeaver]      [JamesCameron]  \n",
       "1     [JohnnyDepp, OrlandoBloom, KeiraKnightley]     [GoreVerbinski]  \n",
       "2      [DanielCraig, ChristophWaltz, LéaSeydoux]         [SamMendes]  \n",
       "3      [ChristianBale, MichaelCaine, GaryOldman]  [ChristopherNolan]  \n",
       "4    [TaylorKitsch, LynnCollins, SamanthaMorton]     [AndrewStanton]  "
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "25b16cae",
   "metadata": {},
   "outputs": [],
   "source": [
    "movies['overview'] = movies['overview'].apply(lambda x:x.split())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "a4cf3ba0",
   "metadata": {},
   "outputs": [],
   "source": [
    "movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "5b85a9ee",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movie_id</th>\n",
       "      <th>title</th>\n",
       "      <th>overview</th>\n",
       "      <th>genres</th>\n",
       "      <th>keywords</th>\n",
       "      <th>cast</th>\n",
       "      <th>crew</th>\n",
       "      <th>tags</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>19995</td>\n",
       "      <td>Avatar</td>\n",
       "      <td>[In, the, 22nd, century,, a, paraplegic, Marin...</td>\n",
       "      <td>[Action, Adventure, Fantasy, ScienceFiction]</td>\n",
       "      <td>[cultureclash, future, spacewar, spacecolony, ...</td>\n",
       "      <td>[SamWorthington, ZoeSaldana, SigourneyWeaver]</td>\n",
       "      <td>[JamesCameron]</td>\n",
       "      <td>[In, the, 22nd, century,, a, paraplegic, Marin...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>285</td>\n",
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
       "      <td>[Captain, Barbossa,, long, believed, to, be, d...</td>\n",
       "      <td>[Adventure, Fantasy, Action]</td>\n",
       "      <td>[ocean, drugabuse, exoticisland, eastindiatrad...</td>\n",
       "      <td>[JohnnyDepp, OrlandoBloom, KeiraKnightley]</td>\n",
       "      <td>[GoreVerbinski]</td>\n",
       "      <td>[Captain, Barbossa,, long, believed, to, be, d...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>206647</td>\n",
       "      <td>Spectre</td>\n",
       "      <td>[A, cryptic, message, from, Bond’s, past, send...</td>\n",
       "      <td>[Action, Adventure, Crime]</td>\n",
       "      <td>[spy, basedonnovel, secretagent, sequel, mi6, ...</td>\n",
       "      <td>[DanielCraig, ChristophWaltz, LéaSeydoux]</td>\n",
       "      <td>[SamMendes]</td>\n",
       "      <td>[A, cryptic, message, from, Bond’s, past, send...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>49026</td>\n",
       "      <td>The Dark Knight Rises</td>\n",
       "      <td>[Following, the, death, of, District, Attorney...</td>\n",
       "      <td>[Action, Crime, Drama, Thriller]</td>\n",
       "      <td>[dccomics, crimefighter, terrorist, secretiden...</td>\n",
       "      <td>[ChristianBale, MichaelCaine, GaryOldman]</td>\n",
       "      <td>[ChristopherNolan]</td>\n",
       "      <td>[Following, the, death, of, District, Attorney...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>49529</td>\n",
       "      <td>John Carter</td>\n",
       "      <td>[John, Carter, is, a, war-weary,, former, mili...</td>\n",
       "      <td>[Action, Adventure, ScienceFiction]</td>\n",
       "      <td>[basedonnovel, mars, medallion, spacetravel, p...</td>\n",
       "      <td>[TaylorKitsch, LynnCollins, SamanthaMorton]</td>\n",
       "      <td>[AndrewStanton]</td>\n",
       "      <td>[John, Carter, is, a, war-weary,, former, mili...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   movie_id                                     title  \\\n",
       "0     19995                                    Avatar   \n",
       "1       285  Pirates of the Caribbean: At World's End   \n",
       "2    206647                                   Spectre   \n",
       "3     49026                     The Dark Knight Rises   \n",
       "4     49529                               John Carter   \n",
       "\n",
       "                                            overview  \\\n",
       "0  [In, the, 22nd, century,, a, paraplegic, Marin...   \n",
       "1  [Captain, Barbossa,, long, believed, to, be, d...   \n",
       "2  [A, cryptic, message, from, Bond’s, past, send...   \n",
       "3  [Following, the, death, of, District, Attorney...   \n",
       "4  [John, Carter, is, a, war-weary,, former, mili...   \n",
       "\n",
       "                                         genres  \\\n",
       "0  [Action, Adventure, Fantasy, ScienceFiction]   \n",
       "1                  [Adventure, Fantasy, Action]   \n",
       "2                    [Action, Adventure, Crime]   \n",
       "3              [Action, Crime, Drama, Thriller]   \n",
       "4           [Action, Adventure, ScienceFiction]   \n",
       "\n",
       "                                            keywords  \\\n",
       "0  [cultureclash, future, spacewar, spacecolony, ...   \n",
       "1  [ocean, drugabuse, exoticisland, eastindiatrad...   \n",
       "2  [spy, basedonnovel, secretagent, sequel, mi6, ...   \n",
       "3  [dccomics, crimefighter, terrorist, secretiden...   \n",
       "4  [basedonnovel, mars, medallion, spacetravel, p...   \n",
       "\n",
       "                                            cast                crew  \\\n",
       "0  [SamWorthington, ZoeSaldana, SigourneyWeaver]      [JamesCameron]   \n",
       "1     [JohnnyDepp, OrlandoBloom, KeiraKnightley]     [GoreVerbinski]   \n",
       "2      [DanielCraig, ChristophWaltz, LéaSeydoux]         [SamMendes]   \n",
       "3      [ChristianBale, MichaelCaine, GaryOldman]  [ChristopherNolan]   \n",
       "4    [TaylorKitsch, LynnCollins, SamanthaMorton]     [AndrewStanton]   \n",
       "\n",
       "                                                tags  \n",
       "0  [In, the, 22nd, century,, a, paraplegic, Marin...  \n",
       "1  [Captain, Barbossa,, long, believed, to, be, d...  \n",
       "2  [A, cryptic, message, from, Bond’s, past, send...  \n",
       "3  [Following, the, death, of, District, Attorney...  \n",
       "4  [John, Carter, is, a, war-weary,, former, mili...  "
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "c8e5db66",
   "metadata": {},
   "outputs": [],
   "source": [
    "new = movies.drop(columns=['overview','genres','keywords','cast','crew'])\n",
    "#new.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "86337c80",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movie_id</th>\n",
       "      <th>title</th>\n",
       "      <th>tags</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>19995</td>\n",
       "      <td>Avatar</td>\n",
       "      <td>In the 22nd century, a paraplegic Marine is di...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>285</td>\n",
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
       "      <td>Captain Barbossa, long believed to be dead, ha...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>206647</td>\n",
       "      <td>Spectre</td>\n",
       "      <td>A cryptic message from Bond’s past sends him o...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>49026</td>\n",
       "      <td>The Dark Knight Rises</td>\n",
       "      <td>Following the death of District Attorney Harve...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>49529</td>\n",
       "      <td>John Carter</td>\n",
       "      <td>John Carter is a war-weary, former military ca...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   movie_id                                     title  \\\n",
       "0     19995                                    Avatar   \n",
       "1       285  Pirates of the Caribbean: At World's End   \n",
       "2    206647                                   Spectre   \n",
       "3     49026                     The Dark Knight Rises   \n",
       "4     49529                               John Carter   \n",
       "\n",
       "                                                tags  \n",
       "0  In the 22nd century, a paraplegic Marine is di...  \n",
       "1  Captain Barbossa, long believed to be dead, ha...  \n",
       "2  A cryptic message from Bond’s past sends him o...  \n",
       "3  Following the death of District Attorney Harve...  \n",
       "4  John Carter is a war-weary, former military ca...  "
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new['tags'] = new['tags'].apply(lambda x: \" \".join(x))\n",
    "new.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "b2096072",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "cv = CountVectorizer(max_features=5000,stop_words='english')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "78653bfc",
   "metadata": {},
   "outputs": [],
   "source": [
    "vector = cv.fit_transform(new['tags']).toarray()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "90f36092",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0, 0, 0, ..., 0, 0, 0],\n",
       "       [0, 0, 0, ..., 0, 0, 0],\n",
       "       [0, 0, 0, ..., 0, 0, 0],\n",
       "       ...,\n",
       "       [0, 0, 0, ..., 0, 0, 0],\n",
       "       [0, 0, 0, ..., 0, 0, 0],\n",
       "       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vector"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "e4a28860",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 0, 0, ..., 0, 0, 0], dtype=int64)"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vector[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "d7809e16",
   "metadata": {},
   "outputs": [],
   "source": [
    "# cv.get_feature_names()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "4b71c2f2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(4806, 5000)"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vector.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "12a944fa",
   "metadata": {},
   "outputs": [],
   "source": [
    "import nltk"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "4abe934e",
   "metadata": {},
   "outputs": [],
   "source": [
    "from nltk.stem.porter import PorterStemmer\n",
    "ps= PorterStemmer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "0ff40f63",
   "metadata": {},
   "outputs": [],
   "source": [
    "def stem(text):\n",
    "    y=[]\n",
    "    \n",
    "    for i in text.split():\n",
    "        y.append(ps.stem(i))\n",
    "        \n",
    "    return \" \".join(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "b98e0748",
   "metadata": {},
   "outputs": [],
   "source": [
    "new['tags'] = new['tags'].apply(stem) #stem is used to combine words of same meaning in one word"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "a8bab646",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics.pairwise import cosine_similarity \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "03d53db1",
   "metadata": {},
   "outputs": [],
   "source": [
    "similarity = cosine_similarity(vector) #calculate angle btw to vectors and find the sisimilarities\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "289f5e53",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1.        , 0.08964215, 0.06071767, ..., 0.02519763, 0.0277885 ,\n",
       "        0.        ],\n",
       "       [0.08964215, 1.        , 0.06350006, ..., 0.02635231, 0.        ,\n",
       "        0.        ],\n",
       "       [0.06071767, 0.06350006, 1.        , ..., 0.02677398, 0.        ,\n",
       "        0.        ],\n",
       "       ...,\n",
       "       [0.02519763, 0.02635231, 0.02677398, ..., 1.        , 0.07352146,\n",
       "        0.04774099],\n",
       "       [0.0277885 , 0.        , 0.        , ..., 0.07352146, 1.        ,\n",
       "        0.05264981],\n",
       "       [0.        , 0.        , 0.        , ..., 0.04774099, 0.05264981,\n",
       "        1.        ]])"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "similarity\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "878adeff",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "744"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new[new['title'] == 'The Lego Movie'].index[0]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "00337383",
   "metadata": {},
   "outputs": [],
   "source": [
    "def recommend(movie):\n",
    "    index = new[new['title'] == movie].index[0]\n",
    "    movie_list = sorted(list(enumerate(similarity[index])),reverse=True,key = lambda x: x[1])\n",
    "    for i in movie_list[1:6]:\n",
    "        print(new.iloc[i[0]].title)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "67047f9b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The Dark Knight\n",
      "The Dark Knight Rises\n",
      "Batman\n",
      "Batman & Robin\n",
      "Batman\n"
     ]
    }
   ],
   "source": [
    "recommend('Batman Begins')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "ef4ae40b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "421e14ba",
   "metadata": {},
   "outputs": [],
   "source": [
    "pickle.dump(new,open('movie_list.pkl','wb'))\n",
    "pickle.dump(similarity,open('similarity.pkl','wb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "e8c97e90",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: pandas in d:\\desktop\\anaconda23\\lib\\site-packages (1.5.3)\n",
      "Requirement already satisfied: numpy>=1.21.0 in d:\\desktop\\anaconda23\\lib\\site-packages (from pandas) (1.23.5)\n",
      "Requirement already satisfied: python-dateutil>=2.8.1 in d:\\desktop\\anaconda23\\lib\\site-packages (from pandas) (2.8.2)\n",
      "Requirement already satisfied: pytz>=2020.1 in d:\\desktop\\anaconda23\\lib\\site-packages (from pandas) (2022.7)\n",
      "Requirement already satisfied: six>=1.5 in d:\\desktop\\anaconda23\\lib\\site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n"
     ]
    }
   ],
   "source": [
    "!pip install pandas --version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "33b2c364",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.5.3\n"
     ]
    }
   ],
   "source": [
    "!python -c \"import pandas as pd; print(pd.__version__)\"\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
